Introduction.

The Company
A bike-share program that features more than 5,800 bicycles and 600 docking stations. It sets itself apart by also offering reclining bikes, hand tricycles, and cargo bikes, making bike-share more inclusive to people with disabilities and riders who can’t use a standard two-wheeled bike.

The Users
Two different types of users are classified:

The Goal
The Department of Marketing wants to design strategies aimed at converting casual riders into annual members.

The Method
In order to accomplish the goal, the Marketing Analyst team needs to better understand how annual members and casual riders differ, why casual riders would buy a membership, and how digital media could affect their marketing tactics.


Business Task.

We have to analyze historical trips data to identify characteristics, trends and connections regarding the bike usage from Members and Casual users. The results will be used by the stakeholders to develop and approve an appropriate marketing strategy.

Stakeholders
The stakeholders are:


Data Loading.

The data has been made available by Motivate International Inc. under this license.
This is public data that we can use to explore but data-privacy issues prohibit us from using riders’ personally identifiable information.

The data to analyze covers from January to December 2021 in more than 5 million records. It is reliable and can provide relevant information to the business task.

Package loading

# Load necessary packages (previously installed).

library(tidyverse)   # to wrangle data
library(dplyr)       # to manipulate data
library(lubridate)   # to parse and manipulate dates
library(sqldf)       # to perform SQL queries
library(ggplot2)     # to visualize data
library(cowplot)     # to improve visualizations
library(wordcloud)   # to improve visualizations
library(ggwordcloud) # to improve visualizations
library(ggmap)       # to work with maps

# Results hidden

Data loading

# Load original data.

trips_2021_01 <- read.csv("Datos/202101-divvy-tripdata.csv")
trips_2021_02 <- read.csv("Datos/202102-divvy-tripdata.csv")
trips_2021_03 <- read.csv("Datos/202103-divvy-tripdata.csv")
trips_2021_04 <- read.csv("Datos/202104-divvy-tripdata.csv")
trips_2021_05 <- read.csv("Datos/202105-divvy-tripdata.csv")
trips_2021_06 <- read.csv("Datos/202106-divvy-tripdata.csv")
trips_2021_07 <- read.csv("Datos/202107-divvy-tripdata.csv")
trips_2021_08 <- read.csv("Datos/202108-divvy-tripdata.csv")
trips_2021_09 <- read.csv("Datos/202109-divvy-tripdata.csv")
trips_2021_10 <- read.csv("Datos/202110-divvy-tripdata.csv")
trips_2021_11 <- read.csv("Datos/202111-divvy-tripdata.csv")
trips_2021_12 <- read.csv("Datos/202112-divvy-tripdata.csv")

Data Processing.

Check consistency

# Check column names

colnames(trips_2021_01)
##  [1] "ride_id"            "rideable_type"      "started_at"        
##  [4] "ended_at"           "start_station_name" "start_station_id"  
##  [7] "end_station_name"   "end_station_id"     "start_lat"         
## [10] "start_lng"          "end_lat"            "end_lng"           
## [13] "member_casual"
colnames(trips_2021_02)
##  [1] "ride_id"            "rideable_type"      "started_at"        
##  [4] "ended_at"           "start_station_name" "start_station_id"  
##  [7] "end_station_name"   "end_station_id"     "start_lat"         
## [10] "start_lng"          "end_lat"            "end_lng"           
## [13] "member_casual"
colnames(trips_2021_03)
##  [1] "ride_id"            "rideable_type"      "started_at"        
##  [4] "ended_at"           "start_station_name" "start_station_id"  
##  [7] "end_station_name"   "end_station_id"     "start_lat"         
## [10] "start_lng"          "end_lat"            "end_lng"           
## [13] "member_casual"
colnames(trips_2021_04)
##  [1] "ride_id"            "rideable_type"      "started_at"        
##  [4] "ended_at"           "start_station_name" "start_station_id"  
##  [7] "end_station_name"   "end_station_id"     "start_lat"         
## [10] "start_lng"          "end_lat"            "end_lng"           
## [13] "member_casual"
colnames(trips_2021_05)
##  [1] "ride_id"            "rideable_type"      "started_at"        
##  [4] "ended_at"           "start_station_name" "start_station_id"  
##  [7] "end_station_name"   "end_station_id"     "start_lat"         
## [10] "start_lng"          "end_lat"            "end_lng"           
## [13] "member_casual"
colnames(trips_2021_06)
##  [1] "ride_id"            "rideable_type"      "started_at"        
##  [4] "ended_at"           "start_station_name" "start_station_id"  
##  [7] "end_station_name"   "end_station_id"     "start_lat"         
## [10] "start_lng"          "end_lat"            "end_lng"           
## [13] "member_casual"
colnames(trips_2021_07)
##  [1] "ride_id"            "rideable_type"      "started_at"        
##  [4] "ended_at"           "start_station_name" "start_station_id"  
##  [7] "end_station_name"   "end_station_id"     "start_lat"         
## [10] "start_lng"          "end_lat"            "end_lng"           
## [13] "member_casual"
colnames(trips_2021_08)
##  [1] "ride_id"            "rideable_type"      "started_at"        
##  [4] "ended_at"           "start_station_name" "start_station_id"  
##  [7] "end_station_name"   "end_station_id"     "start_lat"         
## [10] "start_lng"          "end_lat"            "end_lng"           
## [13] "member_casual"
colnames(trips_2021_09)
##  [1] "ride_id"            "rideable_type"      "started_at"        
##  [4] "ended_at"           "start_station_name" "start_station_id"  
##  [7] "end_station_name"   "end_station_id"     "start_lat"         
## [10] "start_lng"          "end_lat"            "end_lng"           
## [13] "member_casual"
colnames(trips_2021_10)
##  [1] "ride_id"            "rideable_type"      "started_at"        
##  [4] "ended_at"           "start_station_name" "start_station_id"  
##  [7] "end_station_name"   "end_station_id"     "start_lat"         
## [10] "start_lng"          "end_lat"            "end_lng"           
## [13] "member_casual"
colnames(trips_2021_11)
##  [1] "ride_id"            "rideable_type"      "started_at"        
##  [4] "ended_at"           "start_station_name" "start_station_id"  
##  [7] "end_station_name"   "end_station_id"     "start_lat"         
## [10] "start_lng"          "end_lat"            "end_lng"           
## [13] "member_casual"
colnames(trips_2021_12)
##  [1] "ride_id"            "rideable_type"      "started_at"        
##  [4] "ended_at"           "start_station_name" "start_station_id"  
##  [7] "end_station_name"   "end_station_id"     "start_lat"         
## [10] "start_lng"          "end_lat"            "end_lng"           
## [13] "member_casual"
# Check structure

str(trips_2021_01)
## 'data.frame':    96834 obs. of  13 variables:
##  $ ride_id           : chr  "E19E6F1B8D4C42ED" "DC88F20C2C55F27F" "EC45C94683FE3F27" "4FA453A75AE377DB" ...
##  $ rideable_type     : chr  "electric_bike" "electric_bike" "electric_bike" "electric_bike" ...
##  $ started_at        : chr  "2021-01-23 16:14:19" "2021-01-27 18:43:08" "2021-01-21 22:35:54" "2021-01-07 13:31:13" ...
##  $ ended_at          : chr  "2021-01-23 16:24:44" "2021-01-27 18:47:12" "2021-01-21 22:37:14" "2021-01-07 13:42:55" ...
##  $ start_station_name: chr  "California Ave & Cortez St" "California Ave & Cortez St" "California Ave & Cortez St" "California Ave & Cortez St" ...
##  $ start_station_id  : chr  "17660" "17660" "17660" "17660" ...
##  $ end_station_name  : chr  "" "" "" "" ...
##  $ end_station_id    : chr  "" "" "" "" ...
##  $ start_lat         : num  41.9 41.9 41.9 41.9 41.9 ...
##  $ start_lng         : num  -87.7 -87.7 -87.7 -87.7 -87.7 ...
##  $ end_lat           : num  41.9 41.9 41.9 41.9 41.9 ...
##  $ end_lng           : num  -87.7 -87.7 -87.7 -87.7 -87.7 ...
##  $ member_casual     : chr  "member" "member" "member" "member" ...
str(trips_2021_02)
## 'data.frame':    49622 obs. of  13 variables:
##  $ ride_id           : chr  "89E7AA6C29227EFF" "0FEFDE2603568365" "E6159D746B2DBB91" "B32D3199F1C2E75B" ...
##  $ rideable_type     : chr  "classic_bike" "classic_bike" "electric_bike" "classic_bike" ...
##  $ started_at        : chr  "2021-02-12 16:14:56" "2021-02-14 17:52:38" "2021-02-09 19:10:18" "2021-02-02 17:49:41" ...
##  $ ended_at          : chr  "2021-02-12 16:21:43" "2021-02-14 18:12:09" "2021-02-09 19:19:10" "2021-02-02 17:54:06" ...
##  $ start_station_name: chr  "Glenwood Ave & Touhy Ave" "Glenwood Ave & Touhy Ave" "Clark St & Lake St" "Wood St & Chicago Ave" ...
##  $ start_station_id  : chr  "525" "525" "KA1503000012" "637" ...
##  $ end_station_name  : chr  "Sheridan Rd & Columbia Ave" "Bosworth Ave & Howard St" "State St & Randolph St" "Honore St & Division St" ...
##  $ end_station_id    : chr  "660" "16806" "TA1305000029" "TA1305000034" ...
##  $ start_lat         : num  42 42 41.9 41.9 41.8 ...
##  $ start_lng         : num  -87.7 -87.7 -87.6 -87.7 -87.6 ...
##  $ end_lat           : num  42 42 41.9 41.9 41.8 ...
##  $ end_lng           : num  -87.7 -87.7 -87.6 -87.7 -87.6 ...
##  $ member_casual     : chr  "member" "casual" "member" "member" ...
str(trips_2021_03)
## 'data.frame':    228496 obs. of  13 variables:
##  $ ride_id           : chr  "CFA86D4455AA1030" "30D9DC61227D1AF3" "846D87A15682A284" "994D05AA75A168F2" ...
##  $ rideable_type     : chr  "classic_bike" "classic_bike" "classic_bike" "classic_bike" ...
##  $ started_at        : chr  "2021-03-16 08:32:30" "2021-03-28 01:26:28" "2021-03-11 21:17:29" "2021-03-11 13:26:42" ...
##  $ ended_at          : chr  "2021-03-16 08:36:34" "2021-03-28 01:36:55" "2021-03-11 21:33:53" "2021-03-11 13:55:41" ...
##  $ start_station_name: chr  "Humboldt Blvd & Armitage Ave" "Humboldt Blvd & Armitage Ave" "Shields Ave & 28th Pl" "Winthrop Ave & Lawrence Ave" ...
##  $ start_station_id  : chr  "15651" "15651" "15443" "TA1308000021" ...
##  $ end_station_name  : chr  "Stave St & Armitage Ave" "Central Park Ave & Bloomingdale Ave" "Halsted St & 35th St" "Broadway & Sheridan Rd" ...
##  $ end_station_id    : chr  "13266" "18017" "TA1308000043" "13323" ...
##  $ start_lat         : num  41.9 41.9 41.8 42 42 ...
##  $ start_lng         : num  -87.7 -87.7 -87.6 -87.7 -87.7 ...
##  $ end_lat           : num  41.9 41.9 41.8 42 42.1 ...
##  $ end_lng           : num  -87.7 -87.7 -87.6 -87.6 -87.7 ...
##  $ member_casual     : chr  "casual" "casual" "casual" "casual" ...
str(trips_2021_04)
## 'data.frame':    337230 obs. of  13 variables:
##  $ ride_id           : chr  "6C992BD37A98A63F" "1E0145613A209000" "E498E15508A80BAD" "1887262AD101C604" ...
##  $ rideable_type     : chr  "classic_bike" "docked_bike" "docked_bike" "classic_bike" ...
##  $ started_at        : chr  "2021-04-12 18:25:36" "2021-04-27 17:27:11" "2021-04-03 12:42:45" "2021-04-17 09:17:42" ...
##  $ ended_at          : chr  "2021-04-12 18:56:55" "2021-04-27 18:31:29" "2021-04-07 11:40:24" "2021-04-17 09:42:48" ...
##  $ start_station_name: chr  "State St & Pearson St" "Dorchester Ave & 49th St" "Loomis Blvd & 84th St" "Honore St & Division St" ...
##  $ start_station_id  : chr  "TA1307000061" "KA1503000069" "20121" "TA1305000034" ...
##  $ end_station_name  : chr  "Southport Ave & Waveland Ave" "Dorchester Ave & 49th St" "Loomis Blvd & 84th St" "Southport Ave & Waveland Ave" ...
##  $ end_station_id    : chr  "13235" "KA1503000069" "20121" "13235" ...
##  $ start_lat         : num  41.9 41.8 41.7 41.9 41.7 ...
##  $ start_lng         : num  -87.6 -87.6 -87.7 -87.7 -87.7 ...
##  $ end_lat           : num  41.9 41.8 41.7 41.9 41.7 ...
##  $ end_lng           : num  -87.7 -87.6 -87.7 -87.7 -87.7 ...
##  $ member_casual     : chr  "member" "casual" "casual" "member" ...
str(trips_2021_05)
## 'data.frame':    531633 obs. of  13 variables:
##  $ ride_id           : chr  "C809ED75D6160B2A" "DD59FDCE0ACACAF3" "0AB83CB88C43EFC2" "7881AC6D39110C60" ...
##  $ rideable_type     : chr  "electric_bike" "electric_bike" "electric_bike" "electric_bike" ...
##  $ started_at        : chr  "2021-05-30 11:58:15" "2021-05-30 11:29:14" "2021-05-30 14:24:01" "2021-05-30 14:25:51" ...
##  $ ended_at          : chr  "2021-05-30 12:10:39" "2021-05-30 12:14:09" "2021-05-30 14:25:13" "2021-05-30 14:41:04" ...
##  $ start_station_name: chr  "" "" "" "" ...
##  $ start_station_id  : chr  "" "" "" "" ...
##  $ end_station_name  : chr  "" "" "" "" ...
##  $ end_station_id    : chr  "" "" "" "" ...
##  $ start_lat         : num  41.9 41.9 41.9 41.9 41.9 ...
##  $ start_lng         : num  -87.6 -87.6 -87.7 -87.7 -87.7 ...
##  $ end_lat           : num  41.9 41.8 41.9 41.9 41.9 ...
##  $ end_lng           : num  -87.6 -87.6 -87.7 -87.7 -87.7 ...
##  $ member_casual     : chr  "casual" "casual" "casual" "casual" ...
str(trips_2021_06)
## 'data.frame':    729595 obs. of  13 variables:
##  $ ride_id           : chr  "99FEC93BA843FB20" "06048DCFC8520CAF" "9598066F68045DF2" "B03C0FE48C412214" ...
##  $ rideable_type     : chr  "electric_bike" "electric_bike" "electric_bike" "electric_bike" ...
##  $ started_at        : chr  "2021-06-13 14:31:28" "2021-06-04 11:18:02" "2021-06-04 09:49:35" "2021-06-03 19:56:05" ...
##  $ ended_at          : chr  "2021-06-13 14:34:11" "2021-06-04 11:24:19" "2021-06-04 09:55:34" "2021-06-03 20:21:55" ...
##  $ start_station_name: chr  "" "" "" "" ...
##  $ start_station_id  : chr  "" "" "" "" ...
##  $ end_station_name  : chr  "" "" "" "" ...
##  $ end_station_id    : chr  "" "" "" "" ...
##  $ start_lat         : num  41.8 41.8 41.8 41.8 41.8 ...
##  $ start_lng         : num  -87.6 -87.6 -87.6 -87.6 -87.6 ...
##  $ end_lat           : num  41.8 41.8 41.8 41.8 41.8 ...
##  $ end_lng           : num  -87.6 -87.6 -87.6 -87.6 -87.6 ...
##  $ member_casual     : chr  "member" "member" "member" "member" ...
str(trips_2021_07)
## 'data.frame':    822410 obs. of  13 variables:
##  $ ride_id           : chr  "0A1B623926EF4E16" "B2D5583A5A5E76EE" "6F264597DDBF427A" "379B58EAB20E8AA5" ...
##  $ rideable_type     : chr  "docked_bike" "classic_bike" "classic_bike" "classic_bike" ...
##  $ started_at        : chr  "2021-07-02 14:44:36" "2021-07-07 16:57:42" "2021-07-25 11:30:55" "2021-07-08 22:08:30" ...
##  $ ended_at          : chr  "2021-07-02 15:19:58" "2021-07-07 17:16:09" "2021-07-25 11:48:45" "2021-07-08 22:23:32" ...
##  $ start_station_name: chr  "Michigan Ave & Washington St" "California Ave & Cortez St" "Wabash Ave & 16th St" "California Ave & Cortez St" ...
##  $ start_station_id  : chr  "13001" "17660" "SL-012" "17660" ...
##  $ end_station_name  : chr  "Halsted St & North Branch St" "Wood St & Hubbard St" "Rush St & Hubbard St" "Carpenter St & Huron St" ...
##  $ end_station_id    : chr  "KA1504000117" "13432" "KA1503000044" "13196" ...
##  $ start_lat         : num  41.9 41.9 41.9 41.9 41.9 ...
##  $ start_lng         : num  -87.6 -87.7 -87.6 -87.7 -87.7 ...
##  $ end_lat           : num  41.9 41.9 41.9 41.9 41.9 ...
##  $ end_lng           : num  -87.6 -87.7 -87.6 -87.7 -87.7 ...
##  $ member_casual     : chr  "casual" "casual" "member" "member" ...
str(trips_2021_08)
## 'data.frame':    804352 obs. of  13 variables:
##  $ ride_id           : chr  "99103BB87CC6C1BB" "EAFCCCFB0A3FC5A1" "9EF4F46C57AD234D" "5834D3208BFAF1DA" ...
##  $ rideable_type     : chr  "electric_bike" "electric_bike" "electric_bike" "electric_bike" ...
##  $ started_at        : chr  "2021-08-10 17:15:49" "2021-08-10 17:23:14" "2021-08-21 02:34:23" "2021-08-21 06:52:55" ...
##  $ ended_at          : chr  "2021-08-10 17:22:44" "2021-08-10 17:39:24" "2021-08-21 02:50:36" "2021-08-21 07:08:13" ...
##  $ start_station_name: chr  "" "" "" "" ...
##  $ start_station_id  : chr  "" "" "" "" ...
##  $ end_station_name  : chr  "" "" "" "" ...
##  $ end_station_id    : chr  "" "" "" "" ...
##  $ start_lat         : num  41.8 41.8 42 42 41.8 ...
##  $ start_lng         : num  -87.7 -87.7 -87.7 -87.7 -87.6 ...
##  $ end_lat           : num  41.8 41.8 42 42 41.8 ...
##  $ end_lng           : num  -87.7 -87.6 -87.7 -87.7 -87.6 ...
##  $ member_casual     : chr  "member" "member" "member" "member" ...
str(trips_2021_09)
## 'data.frame':    756147 obs. of  13 variables:
##  $ ride_id           : chr  "9DC7B962304CBFD8" "F930E2C6872D6B32" "6EF72137900BB910" "78D1DE133B3DBF55" ...
##  $ rideable_type     : chr  "electric_bike" "electric_bike" "electric_bike" "electric_bike" ...
##  $ started_at        : chr  "2021-09-28 16:07:10" "2021-09-28 14:24:51" "2021-09-28 00:20:16" "2021-09-28 14:51:17" ...
##  $ ended_at          : chr  "2021-09-28 16:09:54" "2021-09-28 14:40:05" "2021-09-28 00:23:57" "2021-09-28 15:00:06" ...
##  $ start_station_name: chr  "" "" "" "" ...
##  $ start_station_id  : chr  "" "" "" "" ...
##  $ end_station_name  : chr  "" "" "" "" ...
##  $ end_station_id    : chr  "" "" "" "" ...
##  $ start_lat         : num  41.9 41.9 41.8 41.8 41.9 ...
##  $ start_lng         : num  -87.7 -87.6 -87.7 -87.7 -87.7 ...
##  $ end_lat           : num  41.9 42 41.8 41.8 41.9 ...
##  $ end_lng           : num  -87.7 -87.7 -87.7 -87.7 -87.7 ...
##  $ member_casual     : chr  "casual" "casual" "casual" "casual" ...
str(trips_2021_10)
## 'data.frame':    631226 obs. of  13 variables:
##  $ ride_id           : chr  "620BC6107255BF4C" "4471C70731AB2E45" "26CA69D43D15EE14" "362947F0437E1514" ...
##  $ rideable_type     : chr  "electric_bike" "electric_bike" "electric_bike" "electric_bike" ...
##  $ started_at        : chr  "2021-10-22 12:46:42" "2021-10-21 09:12:37" "2021-10-16 16:28:39" "2021-10-16 16:17:48" ...
##  $ ended_at          : chr  "2021-10-22 12:49:50" "2021-10-21 09:14:14" "2021-10-16 16:36:26" "2021-10-16 16:19:03" ...
##  $ start_station_name: chr  "Kingsbury St & Kinzie St" "" "" "" ...
##  $ start_station_id  : chr  "KA1503000043" "" "" "" ...
##  $ end_station_name  : chr  "" "" "" "" ...
##  $ end_station_id    : chr  "" "" "" "" ...
##  $ start_lat         : num  41.9 41.9 41.9 41.9 41.9 ...
##  $ start_lng         : num  -87.6 -87.7 -87.7 -87.7 -87.7 ...
##  $ end_lat           : num  41.9 41.9 41.9 41.9 41.9 ...
##  $ end_lng           : num  -87.6 -87.7 -87.7 -87.7 -87.7 ...
##  $ member_casual     : chr  "member" "member" "member" "member" ...
str(trips_2021_11)
## 'data.frame':    359978 obs. of  13 variables:
##  $ ride_id           : chr  "7C00A93E10556E47" "90854840DFD508BA" "0A7D10CDD144061C" "2F3BE33085BCFF02" ...
##  $ rideable_type     : chr  "electric_bike" "electric_bike" "electric_bike" "electric_bike" ...
##  $ started_at        : chr  "2021-11-27 13:27:38" "2021-11-27 13:38:25" "2021-11-26 22:03:34" "2021-11-27 09:56:49" ...
##  $ ended_at          : chr  "2021-11-27 13:46:38" "2021-11-27 13:56:10" "2021-11-26 22:05:56" "2021-11-27 10:01:50" ...
##  $ start_station_name: chr  "" "" "" "" ...
##  $ start_station_id  : chr  "" "" "" "" ...
##  $ end_station_name  : chr  "" "" "" "" ...
##  $ end_station_id    : chr  "" "" "" "" ...
##  $ start_lat         : num  41.9 42 42 41.9 41.9 ...
##  $ start_lng         : num  -87.7 -87.7 -87.7 -87.8 -87.6 ...
##  $ end_lat           : num  42 41.9 42 41.9 41.9 ...
##  $ end_lng           : num  -87.7 -87.7 -87.7 -87.8 -87.6 ...
##  $ member_casual     : chr  "casual" "casual" "casual" "casual" ...
str(trips_2021_12)
## 'data.frame':    247540 obs. of  13 variables:
##  $ ride_id           : chr  "46F8167220E4431F" "73A77762838B32FD" "4CF42452054F59C5" "3278BA87BF698339" ...
##  $ rideable_type     : chr  "electric_bike" "electric_bike" "electric_bike" "classic_bike" ...
##  $ started_at        : chr  "2021-12-07 15:06:07" "2021-12-11 03:43:29" "2021-12-15 23:10:28" "2021-12-26 16:16:10" ...
##  $ ended_at          : chr  "2021-12-07 15:13:42" "2021-12-11 04:10:23" "2021-12-15 23:23:14" "2021-12-26 16:30:53" ...
##  $ start_station_name: chr  "Laflin St & Cullerton St" "LaSalle Dr & Huron St" "Halsted St & North Branch St" "Halsted St & North Branch St" ...
##  $ start_station_id  : chr  "13307" "KP1705001026" "KA1504000117" "KA1504000117" ...
##  $ end_station_name  : chr  "Morgan St & Polk St" "Clarendon Ave & Leland Ave" "Broadway & Barry Ave" "LaSalle Dr & Huron St" ...
##  $ end_station_id    : chr  "TA1307000130" "TA1307000119" "13137" "KP1705001026" ...
##  $ start_lat         : num  41.9 41.9 41.9 41.9 41.9 ...
##  $ start_lng         : num  -87.7 -87.6 -87.6 -87.6 -87.7 ...
##  $ end_lat           : num  41.9 42 41.9 41.9 41.9 ...
##  $ end_lng           : num  -87.7 -87.7 -87.6 -87.6 -87.6 ...
##  $ member_casual     : chr  "member" "casual" "member" "member" ...

Conclusions so far:

First, and given the previous results, we can join the twelve data frames into a single one.

trips_2021 <- bind_rows(trips_2021_01, trips_2021_02, trips_2021_03, trips_2021_04, trips_2021_05, trips_2021_06, trips_2021_07, trips_2021_08, trips_2021_09, trips_2021_10, trips_2021_11, trips_2021_12)

Check new Data Frame

colnames(trips_2021)  # column names
##  [1] "ride_id"            "rideable_type"      "started_at"        
##  [4] "ended_at"           "start_station_name" "start_station_id"  
##  [7] "end_station_name"   "end_station_id"     "start_lat"         
## [10] "start_lng"          "end_lat"            "end_lng"           
## [13] "member_casual"
nrow(trips_2021)      # qty of records
## [1] 5595063
dim(trips_2021)       # dimensions
## [1] 5595063      13
head(trips_2021)      # first rows
##            ride_id rideable_type          started_at            ended_at
## 1 E19E6F1B8D4C42ED electric_bike 2021-01-23 16:14:19 2021-01-23 16:24:44
## 2 DC88F20C2C55F27F electric_bike 2021-01-27 18:43:08 2021-01-27 18:47:12
## 3 EC45C94683FE3F27 electric_bike 2021-01-21 22:35:54 2021-01-21 22:37:14
## 4 4FA453A75AE377DB electric_bike 2021-01-07 13:31:13 2021-01-07 13:42:55
## 5 BE5E8EB4E7263A0B electric_bike 2021-01-23 02:24:02 2021-01-23 02:24:45
## 6 5D8969F88C773979 electric_bike 2021-01-09 14:24:07 2021-01-09 15:17:54
##           start_station_name start_station_id end_station_name end_station_id
## 1 California Ave & Cortez St            17660                                
## 2 California Ave & Cortez St            17660                                
## 3 California Ave & Cortez St            17660                                
## 4 California Ave & Cortez St            17660                                
## 5 California Ave & Cortez St            17660                                
## 6 California Ave & Cortez St            17660                                
##   start_lat start_lng end_lat end_lng member_casual
## 1  41.90034 -87.69674   41.89  -87.72        member
## 2  41.90033 -87.69671   41.90  -87.69        member
## 3  41.90031 -87.69664   41.90  -87.70        member
## 4  41.90040 -87.69666   41.92  -87.69        member
## 5  41.90033 -87.69670   41.90  -87.70        casual
## 6  41.90041 -87.69676   41.94  -87.71        casual
str(trips_2021)       # structure
## 'data.frame':    5595063 obs. of  13 variables:
##  $ ride_id           : chr  "E19E6F1B8D4C42ED" "DC88F20C2C55F27F" "EC45C94683FE3F27" "4FA453A75AE377DB" ...
##  $ rideable_type     : chr  "electric_bike" "electric_bike" "electric_bike" "electric_bike" ...
##  $ started_at        : chr  "2021-01-23 16:14:19" "2021-01-27 18:43:08" "2021-01-21 22:35:54" "2021-01-07 13:31:13" ...
##  $ ended_at          : chr  "2021-01-23 16:24:44" "2021-01-27 18:47:12" "2021-01-21 22:37:14" "2021-01-07 13:42:55" ...
##  $ start_station_name: chr  "California Ave & Cortez St" "California Ave & Cortez St" "California Ave & Cortez St" "California Ave & Cortez St" ...
##  $ start_station_id  : chr  "17660" "17660" "17660" "17660" ...
##  $ end_station_name  : chr  "" "" "" "" ...
##  $ end_station_id    : chr  "" "" "" "" ...
##  $ start_lat         : num  41.9 41.9 41.9 41.9 41.9 ...
##  $ start_lng         : num  -87.7 -87.7 -87.7 -87.7 -87.7 ...
##  $ end_lat           : num  41.9 41.9 41.9 41.9 41.9 ...
##  $ end_lng           : num  -87.7 -87.7 -87.7 -87.7 -87.7 ...
##  $ member_casual     : chr  "member" "member" "member" "member" ...
summary(trips_2021)   # summary
##    ride_id          rideable_type       started_at          ended_at        
##  Length:5595063     Length:5595063     Length:5595063     Length:5595063    
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  start_station_name start_station_id   end_station_name   end_station_id    
##  Length:5595063     Length:5595063     Length:5595063     Length:5595063    
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    start_lat       start_lng         end_lat         end_lng      
##  Min.   :41.64   Min.   :-87.84   Min.   :41.39   Min.   :-88.97  
##  1st Qu.:41.88   1st Qu.:-87.66   1st Qu.:41.88   1st Qu.:-87.66  
##  Median :41.90   Median :-87.64   Median :41.90   Median :-87.64  
##  Mean   :41.90   Mean   :-87.65   Mean   :41.90   Mean   :-87.65  
##  3rd Qu.:41.93   3rd Qu.:-87.63   3rd Qu.:41.93   3rd Qu.:-87.63  
##  Max.   :42.07   Max.   :-87.52   Max.   :42.17   Max.   :-87.49  
##                                   NA's   :4771    NA's   :4771    
##  member_casual     
##  Length:5595063    
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 
filter(trips_2021, rideable_type == "") %>%  # no rideable type registered
  count()
##   n
## 1 0
filter(trips_2021, started_at == "") %>%  # no starting time registered
  count()
##   n
## 1 0
filter(trips_2021, ended_at == "") %>%  # no ending time registered
  count()
##   n
## 1 0
filter(trips_2021, member_casual == "") %>%  # no member/casual registered
  count()
##   n
## 1 0
filter(trips_2021, start_station_id == "") %>%  # no start station id registered
  count()
##        n
## 1 690806
filter(trips_2021, start_station_name == "") %>%  # no start station name registered
  count()
##        n
## 1 690809
filter(trips_2021, end_station_id == "") %>%  # no end station id registered
  count()
##        n
## 1 739170
filter(trips_2021, end_station_name == "") %>%  # no end station name registered
  count()
##        n
## 1 739170

Some details:


Data Transformation and Cleaning.

As mentioned before we want to convert the data type for started_at and ended_at from char to datetime.
Also, we need to create new columns for the sake of the analysis, such as:

# Columns started_at and ended_at to datetime 

trips_2021$started_at <- as_datetime(trips_2021$started_at)
trips_2021$ended_at <- as_datetime(trips_2021$ended_at)

# Trip duration in minutes

trips_2021 <-  trips_2021 %>% 
  mutate(trip_duration = difftime(trips_2021$ended_at, trips_2021$started_at, units="mins"))

max(trips_2021$trip_duration) # longest trip
## Time difference of 55944.15 mins
min(trips_2021$trip_duration) # shortest trip
## Time difference of -58.03333 mins
filter(trips_2021, trip_duration <= 0) %>%  # trips with negative or zero duration
  count()
##     n
## 1 653
# Month

trips_2021 <- trips_2021 %>% 
  mutate(month = format(trips_2021$started_at, "%b"))

trips_2021 <-  trips_2021 %>%               # convert to English without changing Date Local
  mutate(month = recode(month,
                        "Ene." = "Jan",
                        "Feb." = "Feb",
                        "Mar." = "Mar",
                        "Abr." = "Apr",
                        "May." = "May",
                        "Jun." = "Jun",
                        "Jul." = "Jul",
                        "Ago." = "Aug",
                        "Set." = "Sep",
                        "Oct." = "Oct",
                        "Nov." = "Nov",
                        "Dic." = "Dec"))

# Day of week

trips_2021 <- trips_2021 %>% 
  mutate(day_of_week = format(trips_2021$started_at, "%A"))

trips_2021 <-  trips_2021 %>%               # convert to English without changing Date Local
  mutate(day_of_week = recode(day_of_week,
                              "lunes" = "Monday",
                              "martes" = "Tuesday",
                              "miércoles" = "Wednesday",
                              "jueves" = "Thursday",
                              "viernes" = "Friday",
                              "sábado" = "Saturday",
                              "domingo" = "Sunday"))

# Time of the day (when trips start)

trips_2021 <- trips_2021 %>% 
  mutate(time_of_day = format(trips_2021$started_at, "%H:%M"))

More details:

The longest trip is about 38 days, and there are 653 trips with negative or zero duration.
This is added to the facts depicted in the previous section.
In real life we should check with the stakeholders and the data team to understand why this is happening.
Since we are not able to consult them, we have to make a decision. Hence:

# Drop NA

trips <- drop_na(trips_2021) # new DF since data is being removed

# Delete trips with zero or negative duration

trips <- trips[!(trips$trip_duration <= 0),]

filter(trips, trip_duration <= 0) %>%  # check trips with negative or zero duration = 0
  count()
##   n
## 1 0

Data Analysis

Trip Duration and Number of Rides

# Trip Duration summary

summary(as.numeric(trips$trip_duration))
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##     0.02     6.75    11.98    20.98    21.73 55944.15
# Grouping by Member/Casual

trips_mem_cas <- trips %>% group_by(member_casual) %>% drop_na()   # temporary variable
trips_mem_cas %>% summarize(mean_duration=mean(trip_duration))     # mean comparison 
## # A tibble: 2 x 2
##   member_casual mean_duration
##   <chr>         <drtn>       
## 1 casual        30.24141 mins
## 2 member        13.35578 mins
trips_mem_cas %>% summarize(median_duration=median(trip_duration)) # median comparison 
## # A tibble: 2 x 2
##   member_casual median_duration
##   <chr>         <drtn>         
## 1 casual        15.95 mins     
## 2 member         9.60 mins
trips_mem_cas %>% summarize(max_duration=max(trip_duration))       # max value comparison 
## # A tibble: 2 x 2
##   member_casual max_duration  
##   <chr>         <drtn>        
## 1 casual        55944.150 mins
## 2 member         1499.933 mins
trips_mem_cas %>% summarize(min_duration=min(trip_duration))       # min value comparison 
## # A tibble: 2 x 2
##   member_casual min_duration   
##   <chr>         <drtn>         
## 1 casual        0.01666667 mins
## 2 member        0.01666667 mins
by_qty <- trips %>% 
  group_by(member_casual) %>%   # quantity and duration comparison
  summarise(number_of_rides = n(), average_duration = mean(trip_duration)) %>% 
  mutate(percentage_of_rides = 100 * number_of_rides / sum(number_of_rides), "%") %>% 
  print()
## # A tibble: 2 x 5
##   member_casual number_of_rides average_duration percentage_of_rides `"%"`
##   <chr>                   <int> <drtn>                         <dbl> <chr>
## 1 casual                2525174 30.24141 mins                   45.2 %    
## 2 member                3064466 13.35578 mins                   54.8 %
# Grouping by Member/Casual and Month

by_month <- trips %>% 
  group_by(member_casual, month) %>%
  summarise(number_of_rides = n(), average_duration = mean(trip_duration)) %>%  # count trips and calculate avg duration by month
  arrange(member_casual, -number_of_rides) %>% 
  print(n = 24)
## # A tibble: 24 x 4
## # Groups:   member_casual [2]
##    member_casual month number_of_rides average_duration
##    <chr>         <chr>           <int> <drtn>          
##  1 casual        Jul            441428 31.43849 mins   
##  2 casual        Aug            412047 27.42168 mins   
##  3 casual        Jun            370111 35.64996 mins   
##  4 casual        Sep            363417 26.67066 mins   
##  5 casual        Oct            256787 24.17718 mins   
##  6 casual        May            256549 36.98005 mins   
##  7 casual        Apr            136403 36.61120 mins   
##  8 casual        Nov            106741 20.08875 mins   
##  9 casual        Mar             83918 36.88276 mins   
## 10 casual        Dec             69606 21.28581 mins   
## 11 casual        Jan             18095 24.46770 mins   
## 12 casual        Feb             10072 44.45105 mins   
## 13 member        Sep            392028 13.48432 mins   
## 14 member        Aug            391492 13.83279 mins   
## 15 member        Jul            380169 14.00684 mins   
## 16 member        Oct            373885 12.25872 mins   
## 17 member        Jun            358701 14.33747 mins   
## 18 member        May            274578 14.42756 mins   
## 19 member        Nov            252960 11.08939 mins   
## 20 member        Apr            200522 14.45630 mins   
## 21 member        Dec            177769 10.83773 mins   
## 22 member        Mar            144399 13.79401 mins   
## 23 member        Jan             78631 12.49477 mins   
## 24 member        Feb             39332 15.48586 mins
# Grouping by Member/Casual and Day of week

by_day <- trips %>% 
  group_by(member_casual, day_of_week) %>%
  summarise(number_of_rides = n(), average_duration = mean(trip_duration)) %>%  # count trips and calculate avg duration by day
  arrange(member_casual, -number_of_rides) %>% 
  print()
## # A tibble: 14 x 4
## # Groups:   member_casual [2]
##    member_casual day_of_week number_of_rides average_duration
##    <chr>         <chr>                 <int> <drtn>          
##  1 casual        Saturday             557121 32.77432 mins   
##  2 casual        Sunday               480300 35.33692 mins   
##  3 casual        Friday               363542 28.61203 mins   
##  4 casual        Monday               285959 30.28861 mins   
##  5 casual        Thursday             285646 26.05190 mins   
##  6 casual        Wednesday            278578 26.23160 mins   
##  7 casual        Tuesday              274028 26.71657 mins   
##  8 member        Wednesday            476961 12.62816 mins   
##  9 member        Tuesday              465312 12.57173 mins   
## 10 member        Thursday             451316 12.52827 mins   
## 11 member        Friday               446180 13.05322 mins   
## 12 member        Saturday             432808 14.92451 mins   
## 13 member        Monday               415995 12.94237 mins   
## 14 member        Sunday               375894 15.25352 mins
# Grouping by Member/Casual and Time of day (hours - 00 to 23)

by_time <- trips %>%    
  mutate(hour_of_day = substr(time_of_day, 1, 2)) %>% 
  group_by(member_casual, hour_of_day) %>%
  summarise(number_of_rides = n(), average_duration = mean(trip_duration)) %>%  # count trips and calculate avg duration by hour
  arrange(member_casual, -number_of_rides) %>% 
  print(n=48)
## # A tibble: 48 x 4
## # Groups:   member_casual [2]
##    member_casual hour_of_day number_of_rides average_duration
##    <chr>         <chr>                 <int> <drtn>          
##  1 casual        17                   236278 28.28389 mins   
##  2 casual        18                   213538 28.42006 mins   
##  3 casual        16                   205045 30.03258 mins   
##  4 casual        15                   188374 32.46934 mins   
##  5 casual        14                   178243 33.34981 mins   
##  6 casual        13                   173182 33.05681 mins   
##  7 casual        19                   166047 28.58016 mins   
##  8 casual        12                   161865 32.17344 mins   
##  9 casual        11                   135734 32.09181 mins   
## 10 casual        20                   121880 29.81915 mins   
## 11 casual        10                   104544 31.55252 mins   
## 12 casual        21                   103614 29.83077 mins   
## 13 casual        22                    96142 30.22666 mins   
## 14 casual        09                    76069 27.26587 mins   
## 15 casual        23                    73949 31.40175 mins   
## 16 casual        08                    63475 22.58154 mins   
## 17 casual        00                    53784 31.19633 mins   
## 18 casual        07                    46678 20.22503 mins   
## 19 casual        01                    39128 33.79116 mins   
## 20 casual        06                    25696 19.64747 mins   
## 21 casual        02                    25457 37.76655 mins   
## 22 casual        03                    14016 38.12476 mins   
## 23 casual        05                    12519 21.66969 mins   
## 24 casual        04                     9917 38.79013 mins   
## 25 member        17                   320154 14.03325 mins   
## 26 member        18                   271295 13.85518 mins   
## 27 member        16                   257423 13.87235 mins   
## 28 member        15                   200993 13.81801 mins   
## 29 member        19                   194182 13.62962 mins   
## 30 member        12                   180567 13.18884 mins   
## 31 member        13                   177797 13.42395 mins   
## 32 member        14                   174627 13.87374 mins   
## 33 member        08                   173161 12.01245 mins   
## 34 member        11                   155238 13.44880 mins   
## 35 member        07                   149627 12.15826 mins   
## 36 member        20                   131567 13.46876 mins   
## 37 member        09                   130794 12.40158 mins   
## 38 member        10                   129183 13.26205 mins   
## 39 member        21                    97152 13.16339 mins   
## 40 member        06                    81797 12.04041 mins   
## 41 member        22                    74532 13.30599 mins   
## 42 member        23                    51644 13.14761 mins   
## 43 member        00                    33116 12.99434 mins   
## 44 member        05                    30013 11.50798 mins   
## 45 member        01                    21882 14.08890 mins   
## 46 member        02                    12440 13.96086 mins   
## 47 member        04                     8135 12.74671 mins   
## 48 member        03                     7147 14.06828 mins
# Grouping by Member/Casual and Rideable Type

by_rideable <- trips %>% 
  group_by(member_casual, rideable_type) %>%
  summarise(number_of_rides = n(), average_duration = mean(trip_duration)) %>%
  arrange(member_casual, -number_of_rides) %>% 
  print()
## # A tibble: 6 x 4
## # Groups:   member_casual [2]
##   member_casual rideable_type number_of_rides average_duration
##   <chr>         <chr>                   <int> <drtn>          
## 1 casual        classic_bike          1263331 26.248159 mins  
## 2 casual        electric_bike          949803 19.829425 mins  
## 3 casual        docked_bike            312040 78.101056 mins  
## 4 member        classic_bike          1982939 13.717218 mins  
## 5 member        electric_bike         1081526 12.693101 mins  
## 6 member        docked_bike                 1  2.633333 mins

Top 30 Starting Stations

# Casual users

by_casual_start_st <- sqldf("SELECT start_station_name, COUNT(member_casual) AS casual_riders
      FROM trips
      WHERE start_station_name != '' and member_casual = 'casual' 
      GROUP BY start_station_name
      ORDER BY casual_riders DESC
      LIMIT 30") %>% 
  print()
##                    start_station_name casual_riders
## 1             Streeter Dr & Grand Ave         66268
## 2                     Millennium Park         33498
## 3               Michigan Ave & Oak St         29746
## 4                      Shedd Aquarium         23220
## 5                 Theater on the Lake         21322
## 6               Wells St & Concord Ln         19874
## 7           Lake Shore Dr & Monroe St         19589
## 8              Clark St & Lincoln Ave         17016
## 9                   Wells St & Elm St         16644
## 10         Indiana Ave & Roosevelt Rd         16603
## 11                  Clark St & Elm St         16453
## 12  DuSable Lake Shore Dr & Monroe St         16215
## 13            Clark St & Armitage Ave         16188
## 14             Wabash Ave & Grand Ave         16130
## 15               New St & Illinois St         15388
## 16                     Dusable Harbor         15202
## 17         Lake Shore Dr & North Blvd         14836
## 18 DuSable Lake Shore Dr & North Blvd         14785
## 19             Michigan Ave & Lake St         14683
## 20       Michigan Ave & Washington St         14289
## 21              Michigan Ave & 8th St         13624
## 22          Larrabee St & Webster Ave         13323
## 23           Wells St & Evergreen Ave         13154
## 24              Clark St & Newport St         12998
## 25           Wilton Ave & Belmont Ave         12970
## 26               Broadway & Barry Ave         12954
## 27           Fairbanks Ct & Grand Ave         12886
## 28           LaSalle St & Illinois St         12687
## 29              Dearborn St & Erie St         12543
## 30                Buckingham Fountain         12355
# Member users

by_member_start_st <- sqldf("SELECT start_station_name, COUNT(member_casual) AS member_riders
      FROM trips
      WHERE start_station_name != '' and member_casual = 'member' 
      GROUP BY start_station_name
      ORDER BY member_riders DESC
      LIMIT 30") %>% 
  print()
##              start_station_name member_riders
## 1             Clark St & Elm St         24728
## 2         Wells St & Concord Ln         23707
## 3      Kingsbury St & Kinzie St         23551
## 4             Wells St & Elm St         21014
## 5         Dearborn St & Erie St         19579
## 6           Wells St & Huron St         19184
## 7        St. Clair St & Erie St         18889
## 8          Broadway & Barry Ave         17793
## 9       Clinton St & Madison St         16907
## 10    Desplaines St & Kinzie St         16814
## 11      Clark St & Armitage Ave         16696
## 12       Wabash Ave & Grand Ave         16608
## 13       Clark St & Lincoln Ave         16345
## 14      Streeter Dr & Grand Ave         16341
## 15        Green St & Madison St         16004
## 16          Theater on the Lake         15481
## 17 Clinton St & Washington Blvd         15213
## 18        Wells St & Hubbard St         15200
## 19     Wilton Ave & Belmont Ave         14970
## 20    Larrabee St & Webster Ave         14770
## 21        Michigan Ave & Oak St         14561
## 22    Clark St & Wrightwood Ave         14375
## 23    Ashland Ave & Division St         14337
## 24          Ellis Ave & 60th St         14221
## 25  Dearborn Pkwy & Delaware Pl         14152
## 26     Loomis St & Lexington St         13999
## 27       Kingsbury St & Erie St         13944
## 28  Lincoln Ave & Fullerton Ave         13579
## 29      Broadway & Waveland Ave         13574
## 30    Wabash Ave & Roosevelt Rd         13544

Data Visualization

# General parameters

casual_color <- "#54d6b8" # green
member_color <- "#ab54d6" # purple
border_panel <- "grey"
border_plot <- "grey"
captions <- "Data from January to December 2021"

theme_options <- theme(plot.caption.position = "plot",
        plot.caption = element_text(hjust = 1), 
        plot.title.position = "panel",
        plot.title = element_text(hjust = 0.5),
        panel.background = element_rect(color = border_panel),
        plot.background = element_rect(color = border_plot))
# Rides Distribution (percentage)

ggplot(by_qty, aes(x=2, y=percentage_of_rides, fill=member_casual)) +
  geom_bar(stat="identity", width=0.5, color="white", size=1) +
  coord_polar(theta = "y", start=0) +
  theme_void() +
  labs(title="Rides Distribution", 
       caption = captions, 
       fill = "") +
  scale_fill_manual(values = c(casual_color, member_color)) +
  geom_label(aes(label = paste(member_casual, "\n", round(percentage_of_rides, 2), "%", sep="")),
             color = "white",
             size = 4.5,
             label.size = 0,
             position = position_stack(vjust = 0.5),
             show.legend = FALSE) +
   theme(plot.caption.position = "plot",
        plot.caption = element_text(hjust = 1.18, margin = unit(c(0,1,2,0), "mm")), 
        plot.title.position = "panel",
        plot.title = element_text(hjust = 0.5, margin = unit(c(0.5,0,0,0), "cm")),
        plot.margin = unit(c(0,1.5,0,1.5), "cm"),
        plot.background = element_rect(color = border_plot),
        legend.position = c(1, 0.3))    

# By quantity

plot_by_qty <- by_qty %>% 
  ggplot() +
  geom_col(aes(x = member_casual, y = number_of_rides, fill = member_casual), width = 0.5) +
  labs(title="Total Number of Rides",
       x="User",
       y="Number of rides",
       fill = "") +
  scale_fill_manual(values = c(casual_color, member_color)) +
  scale_y_continuous(expand = expansion(mult = c(0.01, 0.05))) +
  theme(plot.caption.position = "plot",
        plot.caption = element_text(hjust = 1),
        plot.title.position = "panel",
        plot.title = element_text(hjust = 0.5),
        panel.background = element_rect(color = border_panel)) 

# By duration

plot_by_dur <- by_qty %>% 
  ggplot() +  
  geom_col(aes(x = member_casual, y = as.numeric(average_duration), fill = member_casual), width = 0.5) +
  labs(title="Average Duration",
       caption = captions, 
       x="User",
       y="Avg. duration (minutes)",
       fill = "") +
  scale_fill_manual(values = c(casual_color, member_color)) +
  scale_y_continuous(expand = expansion(mult = c(0.01, 0.05))) +
  theme(plot.caption.position = "plot",
        plot.caption = element_text(hjust = 1),
        plot.title.position = "panel",
        plot.title = element_text(hjust = 0.5),
        panel.background = element_rect(color = border_panel))

# Plots together 

plot_grid(plot_by_qty, plot_by_dur, ncol=2, align = "hv") +
  panel_border(color = border_panel)

# By Density

less_than_120 <- select(filter(trips, trips$trip_duration < 120), member_casual, trip_duration) # to be able to visualize density

ggplot(data=less_than_120, aes(x=as.numeric(trip_duration), group=member_casual, fill=member_casual)) +
  geom_density(adjust=12, alpha=0.5) +
  labs(title="Trip Duration Density by User",
       caption = paste("*Considering only rides shorter than 120 minutes", "\n\n", captions, sep=""), 
       x="Trip duration (minutes)",
       y="Density",
       fill = "") +  
  scale_fill_manual(values = c(casual_color, member_color)) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.05))) +
  scale_x_continuous(expand = expansion(mult = c(0, 0.00)), n.breaks = 12) +
  theme_options

## Please notice that the means shown in this figure are lower than those calculated earlier because we are now only considering a trip duration less than 120 minutes. ##
# By month

by_month$month <- factor(by_month$month, levels = month.abb) # months in chronological order

by_month  %>% 
  ggplot(aes(x = month, y = number_of_rides, fill = member_casual), width = 0.5) +
  facet_wrap(~member_casual) +
  geom_col(position = "dodge") +
  
  labs(title="Number of Rides by Month",
       caption = captions, 
       x="Month",
       y="Number of rides",
       fill = "") +
  scale_fill_manual(values = c(casual_color, member_color)) + 
  scale_y_continuous(expand = expansion(mult = c(0.01, 0.05))) +
  theme(axis.text.x = element_text(angle = 50, vjust = 0.6)) + 
  theme_options

by_month  %>% 
  ggplot(aes(x = month, y = as.numeric(average_duration), fill = member_casual), width = 0.5) +
  facet_wrap(~member_casual) +
  geom_col(position = "dodge") +
  labs(title="Average Trip Duration by Month",
       caption = captions, 
       x="Month",
       y="Avg. duration (minutes)",
       fill = "") +
  scale_fill_manual(values = c(casual_color, member_color)) + 
  scale_y_continuous(expand = expansion(mult = c(0.01, 0.05))) +
  theme(axis.text.x = element_text(angle = 50, vjust = 0.6)) + 
  theme_options

# By day of week

by_day$day_of_week <- ordered(by_day$day_of_week, levels=c("Monday", "Tuesday", "Wednesday", "Thursday", 
                                                  "Friday", "Saturday", "Sunday")) # days in chronological order

by_day  %>% 
  ggplot(aes(x = day_of_week, y = number_of_rides, fill = member_casual), width = 0.5) +
  facet_wrap(~member_casual) +
  geom_col(position = "dodge") +
  labs(title="Number of Rides by Day",
       caption = captions, 
       x="Day of week",
       y="Number of rides",
       fill = "") +
  scale_fill_manual(values = c(casual_color, member_color)) + 
  scale_y_continuous(expand = expansion(mult = c(0.01, 0.05))) +
  theme(axis.text.x = element_text(angle = 50, vjust = 0.6)) + 
  theme_options

by_day  %>% 
  ggplot(aes(x = day_of_week, y = as.numeric(average_duration), fill = member_casual), width = 0.5) +
  facet_wrap(~member_casual) +
  geom_col(position = "dodge") +
  labs(title="Average Trip Duration by Day",
       caption = captions, 
       x="Day of week",
       y="Avg. duration (minutes)",
       fill = "") +
  scale_fill_manual(values = c(casual_color, member_color)) + 
  scale_y_continuous(expand = expansion(mult = c(0.01, 0.05))) + 
  theme(axis.text.x = element_text(angle = 50, vjust = 0.6)) + 
  theme_options

# Comparison in scatter plot

by_day %>% 
  ggplot(aes(x=day_of_week, y=as.numeric(average_duration), color=member_casual, alpha=0.8)) + 
  geom_point(show.legend = c(size=FALSE, alpha=FALSE), shape=19, size=6) +
  labs(title="Comparison: Average Trip Duration by Day",
       caption = captions, 
       x="Day of week",
       y="Avg. duration (minutes)",
       color = "") + 
  scale_color_manual(values = c(casual_color, member_color)) +
  scale_y_continuous(n.breaks = 6) +
  theme_options

# By time of day

by_time <- arrange(by_time, member_casual, hour_of_day)

by_time  %>% 
  ggplot(aes(x = hour_of_day, y = number_of_rides, fill = member_casual), width = 0.5) +
  facet_wrap(~member_casual) +
  geom_col(position = "dodge") +
  labs(title="Number of Rides by Hour",
       caption = captions, 
       x="Time of day",
       y="Number of rides",
       fill = "") +
  scale_fill_manual(values = c(casual_color, member_color)) + 
  scale_y_continuous(expand = expansion(mult = c(0.01, 0.05))) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.6)) + 
  theme_options

by_time  %>% 
  ggplot(aes(x = hour_of_day, y = as.numeric(average_duration), fill = member_casual), width = 0.5) +
  facet_wrap(~member_casual) +
  geom_col(position = "dodge") +
  labs(title="Average Trip Duration by Hour",
       caption = captions, 
       x="Time of day",
       y="Avg. duration (minutes)",
       fill = "") +
  scale_fill_manual(values = c(casual_color, member_color)) + 
  scale_y_continuous(expand = expansion(mult = c(0.01, 0.05))) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.6)) + 
  theme_options

# By rideable

by_rideable <- by_rideable %>% 
  mutate(rideable_type = recode(rideable_type,
                                "classic_bike" = "classic",
                                "docked_bike" = "docked",
                                "electric_bike" = "electric"))
                                
plot_rideable_num <- by_rideable  %>% 
  ggplot(aes(x = rideable_type, y = number_of_rides, fill = member_casual), width = 0.5) +
  facet_wrap(~member_casual) +
  geom_col(position = "dodge") +
  labs(title="Number of Rides by Rideable",
       x="Rideable type",
       y="Number of rides",
       fill = "") +
  scale_fill_manual(values = c(casual_color, member_color)) + 
  scale_y_continuous(expand = expansion(mult = c(0.01, 0.05))) +
  theme(axis.text.x = element_text(angle = 50, vjust = 0.6)) +
  theme(plot.caption.position = "plot",
        plot.caption = element_text(hjust = 1),
        plot.title.position = "panel",
        plot.title = element_text(hjust = 0.5),
        panel.background = element_rect(color = border_panel))

plot_rideable_avg <- by_rideable  %>% 
  ggplot(aes(x = rideable_type, y = as.numeric(average_duration), fill = member_casual), width = 0.5) +
  facet_wrap(~member_casual) +
  geom_col(position = "dodge") +
  labs(title="Average Trip Duration by Rideable",
       caption = captions, 
       x="Rideable type",
       y="Avg. duration (minutes)",
       fill = "") +
  scale_fill_manual(values = c(casual_color, member_color)) + 
  scale_y_continuous(expand = expansion(mult = c(0.01, 0.05))) + 
  theme(axis.text.x = element_text(angle = 50, vjust = 0.6)) +
  theme(plot.caption.position = "plot",
        plot.caption = element_text(hjust = 1),
        plot.title.position = "panel",
        plot.title = element_text(hjust = 0.5),
        panel.background = element_rect(color = border_panel))

plot_grid(plot_rideable_num, plot_rideable_avg, ncol=2, align = "hv") +
  panel_border(color = border_panel)

# By day and rideable

by_day_rideable <- trips %>% 
  group_by(member_casual, rideable_type, day_of_week) %>%
  summarise(number_of_rides = n(), average_duration = mean(trip_duration)) %>%  
  arrange(member_casual, rideable_type, -average_duration)

by_day_rideable$day_of_week <- ordered(by_day_rideable$day_of_week, levels=c("Monday", "Tuesday", "Wednesday", "Thursday", 
                                                           "Friday", "Saturday", "Sunday"))
by_day_rideable <- by_day_rideable %>% 
  mutate(rideable_type = recode(rideable_type,
                                "classic_bike" = "classic",
                                "docked_bike" = "docked",
                                "electric_bike" = "electric"))

# Average duration

by_day_rideable %>% 
  ggplot(aes(x=day_of_week, y=as.numeric(average_duration), color=member_casual, alpha=0.9, shape=rideable_type)) + 
  geom_point(show.legend = c(size=FALSE, alpha=FALSE), size = 4) +
  labs(title="Comparison: Average Trip Duration by Day and Rideable",
       caption = captions, 
       x="Day of week",
       y="Avg. duration (minutes)",
       color = "Color",
       shape = "Shape") +  
  scale_color_manual(values = c(casual_color, member_color)) +
  scale_shape_manual(values = c(18, 15, 17)) +
  scale_y_continuous(n.breaks = 6) +
  theme_options

# Number of rides

by_day_rideable %>% 
  ggplot(aes(x=day_of_week, y=number_of_rides, color=member_casual, alpha=0.9, shape=rideable_type)) +
  geom_point(show.legend = c(size=FALSE, alpha=FALSE), size = 4) +
  labs(title="Comparison: Number of Rides by Day and Rideable",
       caption = captions, 
       x="Day of week",
       y="Number of rides",
       color = "Color",
       shape = "Shape") +  
  scale_color_manual(values = c(casual_color, member_color)) +
  scale_shape_manual(values = c(18, 15, 17)) +
  theme_options

Most used starting stations and starting positions

# Wordclouds 

set.seed(111)
wc_cas <- ggplot(by_casual_start_st, aes(label = start_station_name, size = casual_riders)) +
  geom_text_wordcloud(area_corr = TRUE, color = casual_color, alpha = seq(0.96,0.08,-0.03)) +
  scale_size_area(max_size = 6) +
  theme(panel.background = element_rect(color = "white", fill = "white"))

set.seed(222)
wc_mem <- ggplot(by_member_start_st, aes(label = start_station_name, size = member_riders, colors = member_color)) +
  geom_text_wordcloud(area_corr = TRUE, color = member_color, alpha = seq(0.96,0.08,-0.03)) +
  scale_size_area(max_size = 5.5) +
  theme(panel.background = element_rect(color = "white", fill = "white"))


plot_grid(wc_cas, wc_mem, ncol=2, align = "hv", rel_widths = c(1,1)) +
  panel_border(color = "white")

# Top ten stations - Casual users

by_casual_start_st_10 <- sqldf("SELECT * FROM by_casual_start_st LIMIT 10")

by_casual_start_st_10 %>% 
  ggplot(aes(x = reorder(start_station_name, -casual_riders), y = casual_riders, fill = casual_color), width = 0.5) +
  geom_col(position = "dodge", alpha = seq(0.92,0.2,-0.08)) +
  labs(title="Top ten starting stations for Casual users",
       caption = captions, 
       x="Station",
       y="Number of rides",
       fill = "") +
  scale_fill_manual(values = c(casual_color)) + 
  scale_y_continuous(expand = expansion(mult = c(0.01, 0.05))) +
  theme(axis.text.x = element_text(angle = 50, vjust = 0.6)) + 
  theme(legend.position = "None", plot.margin = unit(c(5,8,5,8), "mm")) +
  theme_options

# Top ten stations - Member users

by_member_start_st_10 <- sqldf("SELECT * FROM by_member_start_st LIMIT 10")

by_member_start_st_10 %>% 
  ggplot(aes(x = reorder(start_station_name, -member_riders), y = member_riders, fill = member_color), width = 0.5) +
  geom_col(position = "dodge", alpha = seq(0.92,0.2,-0.08)) +
  labs(title="Top 10 starting stations for Member users",
       caption = captions, 
       x="Station",
       y="Number of rides",
       fill = "") +
  scale_fill_manual(values = c(member_color)) + 
  scale_y_continuous(expand = expansion(mult = c(0.01, 0.05))) +
  theme(axis.text.x = element_text(angle = 50, vjust = 0.6)) + 
  theme(legend.position = "None", plot.margin = unit(c(5,8,5,8), "mm")) +
  theme_options

# Top 30 Starting Positions

trips_start_position <- trips %>% mutate(unite(trips, 'start_position', start_lat, start_lng, sep = ' '))

by_casual_start_pos <- sqldf("SELECT start_lat, start_lng, start_position, member_casual, COUNT(member_casual) AS riders
      FROM trips_start_position
      WHERE member_casual = 'casual' 
      GROUP BY start_position
      ORDER BY riders DESC
      LIMIT 30")

by_member_start_pos <- sqldf("SELECT start_lat, start_lng, start_position, member_casual, COUNT(member_casual) AS riders
      FROM trips_start_position
      WHERE member_casual = 'member' 
      GROUP BY start_position
      ORDER BY riders DESC
      LIMIT 30")

by_start_position <- bind_rows(by_casual_start_pos, by_member_start_pos)

# Map of Chicago

chicago_bounds <- c(left = -87.8020, 
                top = 41.9756, 
                right = -87.5334, 
                bottom = 41.7771)

chicago_map <- get_stamenmap(bbox = chicago_bounds, 
                                zoom = 12, 
                                maptype = "terrain")

ggmap(chicago_map, darken = c(0.5, "white")) +
  geom_point(data = by_start_position,
             aes(x = start_lng, y = start_lat, color=member_casual, size = riders),
             alpha = .6) +
  labs(title="Most popular starting positions",
       caption = paste0("\n", captions),
       color = "") +
  scale_color_manual(values = c(casual_color, member_color)) +
  scale_size_continuous(range = c(2,8)) +
  theme_map() +
  theme(plot.margin = unit(c(6,2,2,20), "mm"), 
        plot.title = element_text(size = 13, vjust = 3.5, face = "plain"),
        plot.caption = element_text(size=9),
        legend.text = element_text(size=10.5, vjust = 0.7)) +
  guides(size = "none", color = guide_legend(override.aes = list(size=4))) +
  theme_options 


In Summary

Table I. Top 30 starting stations.
Casual Member
Streeter Dr & Grand Ave Clark St & Elm St
Millennium Park Wells St & Concord Ln
Michigan Ave & Oak St Kingsbury St & Kinzie St
Shedd Aquarium Wells St & Elm St
Theater on the Lake Dearborn St & Erie St
Wells St & Concord Ln Wells St & Huron St
Lake Shore Dr & Monroe St St. Clair St & Erie St
Clark St & Lincoln Ave Broadway & Barry Ave
Wells St & Elm St Clinton St & Madison St
Indiana Ave & Roosevelt Rd Desplaines St & Kinzie St
Clark St & Elm St Clark St & Armitage Ave
DuSable Lake Shore Dr & Monroe St Wabash Ave & Grand Ave
Clark St & Armitage Ave Clark St & Lincoln Ave
Wabash Ave & Grand Ave Streeter Dr & Grand Ave
New St & Illinois St Green St & Madison St
Dusable Harbor Theater on the Lake
Lake Shore Dr & North Blvd Clinton St & Washington Blvd
DuSable Lake Shore Dr & North Blvd Wells St & Hubbard St
Michigan Ave & Lake St Wilton Ave & Belmont Ave
Michigan Ave & Washington St Larrabee St & Webster Ave
Michigan Ave & 8th St Michigan Ave & Oak St
Larrabee St & Webster Ave Clark St & Wrightwood Ave
Wells St & Evergreen Ave Ashland Ave & Division St
Clark St & Newport St Ellis Ave & 60th St
Wilton Ave & Belmont Ave Dearborn Pkwy & Delaware Pl
Broadway & Barry Ave Loomis St & Lexington St
Fairbanks Ct & Grand Ave Kingsbury St & Erie St
LaSalle St & Illinois St Lincoln Ave & Fullerton Ave
Dearborn St & Erie St Broadway & Waveland Ave
Buckingham Fountain Wabash Ave & Roosevelt Rd

Insights and Recommendations


Where to find the target users

The main starting stations and starting positions where the greatest number of trips by Casual users meet the conditions described in the previous section are detailed below.

# Starting stations for Target Casual users

target_casual_st <- sqldf("SELECT start_station_name, COUNT(member_casual) AS rides
      FROM trips
      WHERE member_casual = 'casual' AND 
        start_station_name != '' AND
        rideable_type != 'docked_bike' AND
        trip_duration < 20 AND
        (day_of_week = 'Monday' OR
        day_of_week = 'Tuesday' OR
        day_of_week = 'Wednesday' OR
        day_of_week = 'Thursday') AND
        (month != 'July' AND month != 'August')
      GROUP BY start_station_name
      ORDER BY rides DESC
      LIMIT 30")

# Table 2 - Starting stations for Target Casual users

target_casual_table <- data.frame(select(target_casual_st, Stations = start_station_name))
kable(target_casual_table, caption = "Table II. Top 30 starting stations for Target Casual users.")
Table II. Top 30 starting stations for Target Casual users.
Stations
Streeter Dr & Grand Ave
Wells St & Concord Ln
Wells St & Elm St
Clark St & Elm St
Clark St & Lincoln Ave
Wells St & Huron St
Wells St & Evergreen Ave
Clark St & Armitage Ave
Kingsbury St & Kinzie St
Dearborn St & Erie St
Larrabee St & Webster Ave
Wilton Ave & Belmont Ave
Broadway & Barry Ave
Millennium Park
Wabash Ave & Grand Ave
LaSalle St & Illinois St
Ashland Ave & Division St
Michigan Ave & Oak St
Green St & Madison St
Desplaines St & Kinzie St
Lincoln Ave & Fullerton Ave
Clark St & Newport St
Clark St & Drummond Pl
Clark St & Wrightwood Ave
Dearborn Pkwy & Delaware Pl
DuSable Lake Shore Dr & North Blvd
Wells St & Hubbard St
Halsted St & Roscoe St
New St & Illinois St
Michigan Ave & Lake St
# Starting positions for Target Casual users

target_casual_pos <- sqldf("SELECT start_lat, start_lng, start_position, COUNT(member_casual) AS rides
      FROM trips_start_position
      WHERE member_casual = 'member' AND 
        start_position != '' AND
        rideable_type != 'docked_bike' AND
        trip_duration < 20 AND
        (day_of_week = 'Monday' OR
        day_of_week = 'Tuesday' OR
        day_of_week = 'Wednesday' OR
        day_of_week = 'Thursday') AND
        (month != 'July' AND month != 'August')
      GROUP BY start_position
      ORDER BY rides DESC
      LIMIT 30") 

# Map + Wordcloud 

plot_target_pos <- ggmap(chicago_map, darken = c(0.4, "black")) +
  geom_point(data = target_casual_pos,
             aes(x = start_lng, y = start_lat, size = rides),
             color = casual_color,
             alpha = .6) +
  labs(caption = paste0("\n", captions)) +
  scale_size_continuous(range = c(2,8)) +
  theme_map() +
  guides(size = "none") +
  theme(plot.caption = element_text(hjust = 1.6, size = 10))

set.seed(333)
wc_target_pos <- ggplot(target_casual_st, aes(label = start_station_name, size = rides)) +
  geom_text_wordcloud(area_corr = TRUE, color = casual_color, alpha = seq(0.96,0.08,-0.03)) +
  scale_size_area(max_size = 7) +
  theme(panel.background = element_rect(color = "white", fill = "white"))

plot_title <- ggdraw() +
  draw_label("Most popular starting stations and positions for Target Users",
             fontface = 'plain') 

plot_grid(NULL, plot_title, wc_target_pos, plot_target_pos, nrow=4, align = "hv", 
          rel_heights = c(0.05, 0.05, 0.5, 1)) +
  panel_border(color = border_panel)  

## Please notice that stations and positions do not necessarily match ##

Matching starting stations and starting position.

Even though preferred starting stations and starting positions differ between Casual and Member users, there are several in common, as shown below.
In this case the 40 most popular starting stations and positions for Casual users are compared to the 30 most popular stations and 20 most popular starting positions for Members, considering only trips on Monday to Thursday.

# Matching starting stations

casual_start_st_mon_thu <- sqldf("SELECT start_station_name, COUNT(member_casual) AS riders
      FROM trips
      WHERE start_station_name != '' AND member_casual = 'casual' AND
      (day_of_week = 'Monday' OR
        day_of_week = 'Tuesday' OR
        day_of_week = 'Wednesday' OR
        day_of_week = 'Thursday')
      GROUP BY start_station_name
      ORDER BY riders DESC
      LIMIT 40")   # 40 most popular

member_start_st_mon_thu <- sqldf("SELECT start_station_name, COUNT(member_casual) AS riders
      FROM trips
      WHERE start_station_name != '' AND member_casual = 'member' AND
      (day_of_week = 'Monday' OR
        day_of_week = 'Tuesday' OR
        day_of_week = 'Wednesday' OR
        day_of_week = 'Thursday')
      GROUP BY start_station_name
      ORDER BY riders DESC
      LIMIT 30")   # 30 most popular

st_left <- casual_start_st_mon_thu
st_right <- member_start_st_mon_thu

st_join <- sqldf('SELECT st_left.start_station_name, st_left.riders AS casual_rides, st_right.riders AS member_rides
      FROM st_left 
      INNER JOIN st_right 
      ON st_left.start_station_name = st_right.start_station_name')

# Table 3 - Starting stations in common

match_st_table <- data.frame(select(st_join, "Stations" = start_station_name))
kable(match_st_table, caption = "Table III. Top 15 starting stations in common.")
Table III. Top 15 starting stations in common.
Stations
Wells St & Concord Ln
Wells St & Elm St
Clark St & Elm St
Clark St & Lincoln Ave
Wabash Ave & Grand Ave
Clark St & Armitage Ave
Dearborn St & Erie St
Wells St & Huron St
Broadway & Barry Ave
St. Clair St & Erie St
Kingsbury St & Kinzie St
Ashland Ave & Division St
Green St & Madison St
Columbus Dr & Randolph St
Dearborn Pkwy & Delaware Pl
# Matching starting positions

casual_start_pos_mon_thu <- sqldf("SELECT start_lat, start_lng, start_position, COUNT(member_casual) AS riders
      FROM trips_start_position
      WHERE member_casual = 'casual' AND 
        (day_of_week = 'Monday' OR
        day_of_week = 'Tuesday' OR
        day_of_week = 'Wednesday' OR
        day_of_week = 'Thursday')
      GROUP BY start_position
      ORDER BY riders DESC
      LIMIT 40")   # 40 most popular

member_start_pos_mon_thu <- sqldf("SELECT start_lat, start_lng, start_position, COUNT(member_casual) AS riders
      FROM trips_start_position
      WHERE member_casual = 'member' AND 
        (day_of_week = 'Monday' OR
        day_of_week = 'Tuesday' OR
        day_of_week = 'Wednesday' OR
        day_of_week = 'Thursday')
      GROUP BY start_position
      ORDER BY riders DESC
      LIMIT 20")  # 20 most popular

pos_left <- casual_start_pos_mon_thu
pos_right <- member_start_pos_mon_thu
pos_join <- sqldf('SELECT pos_left.start_position, pos_left.start_lat, pos_left.start_lng, 
        pos_left.riders AS casual_rides, pos_right.riders AS member_rides
      FROM pos_left 
      INNER JOIN pos_right 
      ON pos_left.start_position = pos_right.start_position')

# Map + Wordcloud 

plot_mon_thu <- ggmap(chicago_map, darken = c(0.4, "black")) +
  geom_point(data = pos_join,
             aes(x = start_lng, y = start_lat, size = casual_rides),
             color = casual_color,
             alpha = .6) +
  labs(caption = paste0("\n", captions)) +
  scale_size_continuous(range = c(2,8)) +
  theme_map() +
  guides(size = "none") +
  theme(plot.caption = element_text(hjust = 1.6, size = 10), )

set.seed(333)
wc_cas_mon_thu <- ggplot(st_join, aes(label = start_station_name, size = casual_rides)) +
  geom_text_wordcloud(area_corr = TRUE, color = casual_color, alpha = seq(0.96,0.08,-0.06)) +
  scale_size_area(max_size = 8) +
  theme(panel.background = element_rect(color = "white", fill = "white"))

plot_title <- ggdraw() +
  draw_label(paste0("Most popular starting stations and positions for Casual users", "\n", 
                    "matching most popular ones for Member users (Monday to Thursday)"),
             fontface = 'plain') 

plot_grid(NULL, plot_title, wc_cas_mon_thu, plot_mon_thu, nrow=4, align = "hv", 
          rel_heights = c(0.08, 0.05, 0.5, 1)) +
  panel_border(color = border_panel)  

## Please notice that stations and positions do not necessarily match ##

Thanks

Thank you for reading!

This is my first work, your comments and suggestions will be greatly appreciated!


Packages Citation

Following packages were used.

# Citations

citation("knitr")
## 
## To cite the 'knitr' package in publications use:
## 
##   Yihui Xie (2020). knitr: A General-Purpose Package for Dynamic Report
##   Generation in R. R package version 1.30.
## 
##   Yihui Xie (2015) Dynamic Documents with R and knitr. 2nd edition.
##   Chapman and Hall/CRC. ISBN 978-1498716963
## 
##   Yihui Xie (2014) knitr: A Comprehensive Tool for Reproducible
##   Research in R. In Victoria Stodden, Friedrich Leisch and Roger D.
##   Peng, editors, Implementing Reproducible Computational Research.
##   Chapman and Hall/CRC. ISBN 978-1466561595
## 
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.
citation("tidyverse")
## 
##   Wickham et al., (2019). Welcome to the tidyverse. Journal of Open
##   Source Software, 4(43), 1686, https://doi.org/10.21105/joss.01686
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {Welcome to the {tidyverse}},
##     author = {Hadley Wickham and Mara Averick and Jennifer Bryan and Winston Chang and Lucy D'Agostino McGowan and Romain François and Garrett Grolemund and Alex Hayes and Lionel Henry and Jim Hester and Max Kuhn and Thomas Lin Pedersen and Evan Miller and Stephan Milton Bache and Kirill Müller and Jeroen Ooms and David Robinson and Dana Paige Seidel and Vitalie Spinu and Kohske Takahashi and Davis Vaughan and Claus Wilke and Kara Woo and Hiroaki Yutani},
##     year = {2019},
##     journal = {Journal of Open Source Software},
##     volume = {4},
##     number = {43},
##     pages = {1686},
##     doi = {10.21105/joss.01686},
##   }
citation("dplyr") 
## 
## To cite package 'dplyr' in publications use:
## 
##   Hadley Wickham, Romain François, Lionel Henry and Kirill Müller
##   (2021). dplyr: A Grammar of Data Manipulation. R package version
##   1.0.7. https://CRAN.R-project.org/package=dplyr
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {dplyr: A Grammar of Data Manipulation},
##     author = {Hadley Wickham and Romain François and Lionel Henry and Kirill Müller},
##     year = {2021},
##     note = {R package version 1.0.7},
##     url = {https://CRAN.R-project.org/package=dplyr},
##   }
citation("lubridate")
## 
## To cite lubridate in publications use:
## 
##   Garrett Grolemund, Hadley Wickham (2011). Dates and Times Made Easy
##   with lubridate. Journal of Statistical Software, 40(3), 1-25. URL
##   https://www.jstatsoft.org/v40/i03/.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {Dates and Times Made Easy with {lubridate}},
##     author = {Garrett Grolemund and Hadley Wickham},
##     journal = {Journal of Statistical Software},
##     year = {2011},
##     volume = {40},
##     number = {3},
##     pages = {1--25},
##     url = {https://www.jstatsoft.org/v40/i03/},
##   }
citation("sqldf") 
## 
## To cite package 'sqldf' in publications use:
## 
##   G. Grothendieck (2017). sqldf: Manipulate R Data Frames Using SQL. R
##   package version 0.4-11. https://CRAN.R-project.org/package=sqldf
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {sqldf: Manipulate R Data Frames Using SQL},
##     author = {G. Grothendieck},
##     year = {2017},
##     note = {R package version 0.4-11},
##     url = {https://CRAN.R-project.org/package=sqldf},
##   }
## 
## ATTENTION: This citation information has been auto-generated from the
## package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
citation("ggplot2") 
## 
## To cite ggplot2 in publications, please use:
## 
##   H. Wickham. ggplot2: Elegant Graphics for Data Analysis.
##   Springer-Verlag New York, 2016.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Book{,
##     author = {Hadley Wickham},
##     title = {ggplot2: Elegant Graphics for Data Analysis},
##     publisher = {Springer-Verlag New York},
##     year = {2016},
##     isbn = {978-3-319-24277-4},
##     url = {https://ggplot2.tidyverse.org},
##   }
citation("cowplot") 
## 
## To cite package 'cowplot' in publications use:
## 
##   Claus O. Wilke (2020). cowplot: Streamlined Plot Theme and Plot
##   Annotations for 'ggplot2'. R package version 1.1.1.
##   https://CRAN.R-project.org/package=cowplot
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {cowplot: Streamlined Plot Theme and Plot Annotations for 'ggplot2'},
##     author = {Claus O. Wilke},
##     year = {2020},
##     note = {R package version 1.1.1},
##     url = {https://CRAN.R-project.org/package=cowplot},
##   }
citation("wordcloud")
## 
## To cite package 'wordcloud' in publications use:
## 
##   Ian Fellows (2018). wordcloud: Word Clouds. R package version 2.6.
##   https://CRAN.R-project.org/package=wordcloud
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {wordcloud: Word Clouds},
##     author = {Ian Fellows},
##     year = {2018},
##     note = {R package version 2.6},
##     url = {https://CRAN.R-project.org/package=wordcloud},
##   }
## 
## ATTENTION: This citation information has been auto-generated from the
## package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
citation("ggwordcloud")
## 
## To cite package 'ggwordcloud' in publications use:
## 
##   Erwan Le Pennec and Kamil Slowikowski (2019). ggwordcloud: A Word
##   Cloud Geom for 'ggplot2'. R package version 0.5.0.
##   https://CRAN.R-project.org/package=ggwordcloud
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {ggwordcloud: A Word Cloud Geom for 'ggplot2'},
##     author = {Erwan {Le Pennec} and Kamil Slowikowski},
##     year = {2019},
##     note = {R package version 0.5.0},
##     url = {https://CRAN.R-project.org/package=ggwordcloud},
##   }
citation("ggmap")
## 
## To cite ggmap in publications, please use:
## 
##   D. Kahle and H. Wickham. ggmap: Spatial Visualization with ggplot2.
##   The R Journal, 5(1), 144-161. URL
##   http://journal.r-project.org/archive/2013-1/kahle-wickham.pdf
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     author = {David Kahle and Hadley Wickham},
##     title = {ggmap: Spatial Visualization with ggplot2},
##     journal = {The R Journal},
##     year = {2013},
##     volume = {5},
##     number = {1},
##     pages = {144--161},
##     url = {https://journal.r-project.org/archive/2013-1/kahle-wickham.pdf},
##   }